PAN and catchment boundaries analysis¶

In [33]:
import geopandas as gpd
import pandas as pd
import os
## find the directory of the python (assures compatibility)
python_directory = os.path.abspath("")
from tqdm import tqdm
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import folium
import copy
import math
import statistics
import numpy as np
import warnings
warnings.filterwarnings("ignore")

from models import Optimise_PANs_LSOAs, Optimise_PANs_Schools, Optimise_PANsCatchment_Schools, Optimise_PANsCatchment_LSOAs, reset_parameters, create_custom_legend_handles

Data preparation¶

This includes adding the information required for models in the analysis phase. This includes the estimated number of 5 year olds:

  • Approximately 20% of the 5 to 9 year olds in each LSOA are considered 5 year olds
  • This generates a total number of 5 year 2433 student
  • This estimate is acceptable as it aligns with Dennett's report in this link https://adamdennett.github.io/BH_Secondary_Admissions_Analysis/BH_Sec_Sch_Analysis.html
In [34]:
## load the PANs
PANs = pd.read_csv(rf"{python_directory}/data/Yr7_admissions.csv")

## load the maps
schools = gpd.read_file(rf"{python_directory}/data/brighton_sec_schools.geojson")
lsoa = gpd.read_file(rf"{python_directory}/data/BrightonLSOA_Clean.geojson")
students = gpd.read_file(rf"{python_directory}/data/BrightonLSOA_Clean.geojson")
catchment_a = gpd.read_file(rf"{python_directory}/data/catchment_01.geojson")
catchment_b = gpd.read_file(rf"{python_directory}/data/catchment_02.geojson")
catchment_c = gpd.read_file(rf"{python_directory}/data/catchment_03.geojson")
catchment_list = [catchment_a, catchment_b, catchment_c]
## transform lsoa and students dataframes to EPSG:27700 (For the UK)
lsoa.to_crs(epsg="27700", inplace=True)
students.to_crs(epsg="27700", inplace=True)
for catchment in catchment_list:
    catchment["geometry"] = catchment.buffer(0)
    catchment.to_crs(epsg="27700", inplace=True)

## Print the current CRS
print(f"CRS | schools = {schools.crs}, LSOA = {lsoa.crs}, students LSOA = {students.crs}")
CRS | schools = EPSG:27700, LSOA = EPSG:27700, students LSOA = EPSG:27700

Visualise the number of estimated 5 year olds in Brighton and the school locations.¶

In [35]:
## define colours
colours = {
    "Blatchington Mill School": "steelblue",
    "Brighton Aldridge Community Academy": "orange",
    "Cardinal Newman Catholic School": "limegreen",
    "Dorothy Stringer School": "firebrick",
    "Hove Park School and Sixth Form Centre": "mediumpurple",
    "King's School": "sienna",
    "Longhill High School": "palevioletred",
    "Patcham High School": "gray",
    "Portslade Aldridge Community Academy": "darkkhaki",
    "Varndean School": "darkturquoise",
}

## assign school colours
schools["colour"] = [colours[schools.at[i, "establishment_name"]] for i in schools.index]

## Calculate the estimated 5 year students
reset_parameters(catchment_a, schools, students)
## Generate interactive map for the number of 5 year olds
m = students.explore(column="5_est", tooltip="5_est", name="Estimated 5 year olds", legend=True, min_zoom=12)
schools.explore(
    m=m, column="establishment_name", tooltip="establishment_name", color=schools["colour"], name="Schools", 
    marker_kwds={"radius": 5}, style_kwds={"fillOpacity":1}, legend=True
    )

display(m)
m = None
Make this Notebook Trusted to load map: File -> Trust Notebook

Assign LSOAs to catchment zones¶

We consider the three catchment zone alternatives (labelled "Catchment alternative 1", "Catchment alternative 2" and "Catchment alternative 3" in the layers of the plotted map). Each LSOA is assigned to the catchment zone which contains the majority of the LSOA. This leads to cases where the LSOA catchment zones do not strictly align with the drawn catchment zones (as the drawn catchment zones do not strictly follow the outlines of LSOAs)

In [36]:
## Plot interactive map for all the three catchments
reset_parameters(catchment_a, schools, students)
m = students.explore(column="catchment_ID", categorical=True, cmap="tab10", tooltip="catchment_ID", legend=False, name=f"LSOA alternative 1", min_zoom=12)
catchment_a.explore(m=m, tooltip="catchment_ID", style_kwds={"fill": False, "color": "black"}, name=f"Catchment alternative 1")
reset_parameters(catchment_b, schools, students)
students.explore(m=m, column="catchment_ID", categorical=True, cmap="tab10", tooltip="catchment_ID", legend=False, name=f"LSOA alternative 2")
catchment_b.explore(m=m, tooltip="catchment_ID", style_kwds={"fill": False, "color": "black"}, name=f"Catchment alternative 2")
reset_parameters(catchment_c, schools, students)
students.explore(m=m, column="catchment_ID", categorical=True, cmap="tab10", tooltip="catchment_ID", legend=False, name=f"LSOA alternative 3")
catchment_c.explore(m=m, tooltip="catchment_ID", style_kwds={"fill": False, "color": "black"}, name=f"Catchment alternative 3")
schools.explore(
    m=m, column="establishment_name", tooltip="establishment_name", color=schools["colour"], name="Schools", 
    marker_kwds={"radius": 5}, style_kwds={"fillOpacity":1}, legend=False
    )

folium.LayerControl().add_to(m)
print("All catchment alternatives overlayed (hide layers in interactive map)")
display(m)
m=None
All catchment alternatives overlayed (hide layers in interactive map)
Make this Notebook Trusted to load map: File -> Trust Notebook

Analysis: Optimisation model for school catchments¶

The analysis uses a model that assign students to schools using Euclidean distances as the driver for the assignment process. The model iterates through schools and selects the closes LSOA within the school's catchment zone. It assigns the stydebts in that LSOA to the school. This process is repeated across all schools until taking any further LSOA exceeds the respective school's allocated PAN.

In more detail, the model applies the following workflow:

  • Order the schools starting from "Dorothy Stringer School" followed by the next closest school (School B), School B is then followed by its next closest (excluding "Dorothy Stinger School"), and so on.
  • Each one of the 10 schools selects one LSOA and takes in all the 5 year olds within it if that does not lead to exceeding its PAN
  • We repeat for as many rounds as needed until all schools have almost reached their PANs

The model generates a spatial distribution of school catchments that may not align with the catchment zones due to PAN restrictions. By comparing the results to the catchment zones, we can identify:

  • geospatial areas where the catchment zones may force students to enlist in schools at longer distances from their LSOA;
  • number of students assigned to each school from outside its catchment area
  • median distance travelled by students inside and outside the catchment area of each school

This allows for making conclusions on the suitability of the catchment areas and PAN restrictions used in 2024.

Catchment alternative 1¶

The LSOAs are assigned to the catchment zone where the majority of the LSOA lies. The LSOA-catchment areas map is shown as follows:

In [37]:
reset_parameters(catchment_a, schools, students)
print("Attributes created and reset in all DataFrames")
print("--------------------")
print("LAOAs visualised based on catchment area ID")

m = students.explore(column="catchment_ID", categorical=True, cmap="tab10", tooltip="catchment_ID",  legend=True, name="LSOA alternative 2", min_zoom=12)
catchment_a.explore(m=m, tooltip="catchment_ID", style_kwds={"fill": False, "color": "black"}, name="Catchment alternative 2")
schools.explore(
    m=m, column="establishment_name", tooltip="establishment_name", color=schools["colour"], name="Schools", 
    marker_kwds={"radius": 5}, style_kwds={"fillOpacity":1}, legend=False
    )
folium.LayerControl().add_to(m)
m
Attributes created and reset in all DataFrames
--------------------
LAOAs visualised based on catchment area ID
Out[37]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [38]:
## Run the model
reset_parameters(schools=schools, students=students, catchment=catchment_a)
Optimise_PANsCatchment_Schools(schools=schools, students_lsoa=students, PANs=PANs)
print("Finalised model run")
Finalised model run

Key observations¶

The students in LSOAs are generally assigned to a school within their catchment areas. Yet, we observe a few notable exceptions:

  • "Cardinal Newman"
    • This school attracts more than approximately 60 students from eastern and northern LSOAs outside of its own catchment area
    • This implies that schools close to these eastern LSOAs ("Dorothy Stringer" and "Varndean") and northern areas ("Patchman") have been saturated given the 2024 PANs
    • This leads to a significantly high distance travelled for students attending "Cardinal Newman" outside of their catchment area
  • "Blatchington"
    • This school attracts approximately 10 students from northern LSOAs outside of its own catchment area
    • This implies the schools in northern areas ("Patchman") cannot take all the students within its catchment area given the 2024 PANs
  • "Dorothy"
    • This school attracts approximately 25 students from a northern LSOA
    • Similarly, this implies "Patchman" in the north cannot take all the students in its catchment area
In [39]:
## Plot the map
students["colour"] = [colours[students.at[i, "school"]] for i in students.index]
schools["colour"] = [colours[schools.at[i, "establishment_name"]] for i in schools.index]
fig = students.plot(color=students["colour"], legend=True, figsize=(20,10))
schools.plot(ax=fig, color=schools["colour"], edgecolor='black', markersize=60, legend=True)
catchment_a.plot(ax=fig, facecolor="none", edgecolor="black", label="Catchment")
plt.title("Catchment alternative 1 model results")
plt.legend(handles=create_custom_legend_handles(colours))
plt.show()
plt.close()
In [40]:
## plot target PAN numbers
## create ordered lists
students_total_plot, PANs_2024_plot, students_outside_catchment, distances, distances_outside_catchment = list(), list(), list(), list(), list()
x_axis_list = list()
for school_str in PANs["school"]:
    students_total_plot.append(int(schools[schools["establishment_name"] == school_str]["students_total"]))
    PANs_2024_plot.append(int(PANs[PANs["school"] == school_str]["pan2024"]))
    ## find all LSOAs outside the catchment
    LSOAs_outside_catchment = students[(students["school"] == school_str) & (students["catchment_ID"] != students["catchment_ID_school"])]
    students_outside_catchment.append(sum( LSOAs_outside_catchment["5_est"] ) )
    distances.append(statistics.median( students[(students["school"] == school_str)]["dist_to_school"] ) *  0.000621371)
    if len(LSOAs_outside_catchment.index) > 0:
        distances_outside_catchment.append(statistics.median( LSOAs_outside_catchment["dist_to_school"] ) *  0.000621371)
    else:
        distances_outside_catchment.append(0)
    x_axis_list.append(school_str)

## Generate the graphs
x_axis = np.arange(len(x_axis_list))
width = 0.25
plt.figure(figsize=(7,5))
plt.title("Alternative 1: Model results compared to 2024 PANs")
plt.bar(x_axis - width, students_total_plot, width, color="steelblue", label="Model students")
plt.bar(x_axis , PANs_2024_plot, width, color="orange", label="2024 PANs")
plt.bar(x_axis + width, students_outside_catchment, width, color="red", label="Model students outside catchment")
plt.xticks(x_axis, x_axis_list, rotation=90)
plt.ylabel("Students")
plt.legend()
plt.show()
plt.close()

plt.figure(figsize=(7,5))
plt.title("Alternative 1: Median distances travelled")
plt.bar(x_axis - width / 2 , distances, width, color="steelblue", label="All students")
plt.bar(x_axis + width / 2 , distances_outside_catchment, width, color="red", label="Students outside catchment")
plt.xticks(x_axis, x_axis_list, rotation=90)
plt.ylabel("Distance (miles)")
plt.legend()
plt.show()
plt.close()

Catchment alternative 2¶

The LSOAs are assigned to the catchment zone where the majority of the LSOA lies. The LSOA-catchment areas map is shown as follows:

In [41]:
reset_parameters(catchment_b, schools, students)
print("Attributes created and reset in all DataFrames")
print("--------------------")
print("LAOAs visualised based on catchment area ID")

m = students.explore(column="catchment_ID", categorical=True, cmap="tab10", tooltip="catchment_ID",  legend=True, name="LSOA alternative 2", min_zoom=12)
catchment_b.explore(m=m, tooltip="catchment_ID", style_kwds={"fill": False, "color": "black"}, name="Catchment alternative 2")
schools.explore(
    m=m, column="establishment_name", tooltip="establishment_name", color=schools["colour"], name="Schools", 
    marker_kwds={"radius": 5}, style_kwds={"fillOpacity":1}, legend=False
    )
folium.LayerControl().add_to(m)
m
Attributes created and reset in all DataFrames
--------------------
LAOAs visualised based on catchment area ID
Out[41]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [42]:
## Run the model
reset_parameters(schools=schools, students=students, catchment=catchment_b)
Optimise_PANsCatchment_Schools(schools=schools, students_lsoa=students, PANs=PANs)
print("Finalised model run")
Finalised model run

Key observations¶

The students in LSOAs are generally assigned to a school within their catchment areas. Yet, we observe a few notable exceptions:

  • "Cardinal Newman"
    • This school attracts more than 100 students from eastern LSOAs outside of its own catchment area
    • This implies that schools close to these eastern LSOAs (relative to "Cardinal Newmann") have been saturated given the 2024 PANs
    • This includes "Dorothy Stringer" and "Varndean"
    • This leads to a significantly high distance travelled for students attending "Cardinal Newman" outside of their catchment area
  • "Blatchington"
    • This school attracts 23 students from western LSOAs outside of its own catchment area
    • This implies that "Portslade Aldridge" cannot take all the students within its catchment area given its PAN

Another observation to note is that "Dorothy Stringer" has a high median distance travelled by its studets. This is because its catchment area is confined to areas south the school; meaning, very proimate areas in the north are forced into other schools. This means that "Dorothy Stringer" attracts distant students in the south

In [43]:
## Plot the map
students["colour"] = [colours[students.at[i, "school"]] for i in students.index]
schools["colour"] = [colours[schools.at[i, "establishment_name"]] for i in schools.index]
fig = students.plot(color=students["colour"], legend=True, figsize=(20,10))
schools.plot(ax=fig, color=schools["colour"], edgecolor='black', markersize=60, legend=True)
catchment_b.plot(ax=fig, facecolor="none", edgecolor="black", label="Catchment")
plt.title("Catchment alternative 2 model results")
plt.legend(handles=create_custom_legend_handles(colours))
plt.show()
plt.close()
In [44]:
## plot target PAN numbers
## create ordered lists
students_total_plot, PANs_2024_plot, students_outside_catchment, distances, distances_outside_catchment = list(), list(), list(), list(), list()
x_axis_list = list()
for school_str in PANs["school"]:
    students_total_plot.append(int(schools[schools["establishment_name"] == school_str]["students_total"]))
    PANs_2024_plot.append(int(PANs[PANs["school"] == school_str]["pan2024"]))
    ## find all LSOAs outside the catchment
    LSOAs_outside_catchment = students[(students["school"] == school_str) & (students["catchment_ID"] != students["catchment_ID_school"])]
    students_outside_catchment.append(sum( LSOAs_outside_catchment["5_est"] ) )
    distances.append(statistics.median( students[(students["school"] == school_str)]["dist_to_school"] ) *  0.000621371)
    if len(LSOAs_outside_catchment.index) > 0:
        distances_outside_catchment.append(statistics.median( LSOAs_outside_catchment["dist_to_school"] ) *  0.000621371)
    else:
        distances_outside_catchment.append(0)
    x_axis_list.append(school_str)

## Generate the graphs
x_axis = np.arange(len(x_axis_list))
width = 0.25
plt.figure(figsize=(7,5))
plt.title("Alternative 2: Model results compared to 2024 PANs")
plt.bar(x_axis - width, students_total_plot, width, color="steelblue", label="Model students")
plt.bar(x_axis , PANs_2024_plot, width, color="orange", label="2024 PANs")
plt.bar(x_axis + width, students_outside_catchment, width, color="red", label="Model students outside catchment")
plt.xticks(x_axis, x_axis_list, rotation=90)
plt.ylabel("Students")
plt.legend()
plt.show()
plt.close()

plt.figure(figsize=(7,5))
plt.title("Alternative 2: Median distances travelled")
plt.bar(x_axis - width / 2 , distances, width, color="steelblue", label="All students")
plt.bar(x_axis + width / 2 , distances_outside_catchment, width, color="red", label="Students outside catchment")
plt.xticks(x_axis, x_axis_list, rotation=90)
plt.ylabel("Distance (miles)")
plt.legend()
plt.show()
plt.close()

Catchment alternative 3¶

The LSOAs are assigned to the catchment zone where the majority of the LSOA lies. The LSOA-catchment areas map is shown as follows:

In [45]:
reset_parameters(catchment_c, schools, students)
print("Attributes created and reset in all DataFrames")
print("--------------------")
print("LAOAs visualised based on catchment area ID")

m = students.explore(column="catchment_ID", categorical=True, cmap="tab10", tooltip="catchment_ID",  legend=True, name="LSOA alternative 2", min_zoom=12)
catchment_c.explore(m=m, tooltip="catchment_ID", style_kwds={"fill": False, "color": "black"}, name="Catchment alternative 2")
schools.explore(
    m=m, column="establishment_name", tooltip="establishment_name", color=schools["colour"], name="Schools", 
    marker_kwds={"radius": 5}, style_kwds={"fillOpacity":1}, legend=False
    )
folium.LayerControl().add_to(m)
m
Attributes created and reset in all DataFrames
--------------------
LAOAs visualised based on catchment area ID
Out[45]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [46]:
## Run the model
reset_parameters(schools=schools, students=students, catchment=catchment_c)
Optimise_PANsCatchment_Schools(schools=schools, students_lsoa=students, PANs=PANs)
print("Finalised model run")
Finalised model run

Key observations¶

The students in LSOAs are generally assigned to a school within their catchment areas. Yet, we observe a few notable exceptions:

  • "Dorothy" and "Varndean"
    • It is unclear which catchment area "Varndean" school lies in this alternative
    • The ccurrent iteration assings it to "Dorothy's" catchment are
    • This leads to a significant competition between both schools in a small catchment area
    • This keeps "Varndean" and "Dorothy" unsaturated after addressing all the students in their catchment
    • This gives room to students outside of their catchment areas, which explains the high number of external students
  • "Blatchington Mill"
    • This school attracts approximately 20 students from western LSOAs outside of its own catchment area
    • This implies that "Portslade Aldridge" cannot take all the students within its catchment area given its PAN
  • "Cardinal Newman"
    • This school attracts approximately 100 students from eastern and western LSOAs outside of its own catchment area
    • This implies that "Longhill" in the east have been saturated given the 2024 PANs and cannot take all the students in its catchment
    • It also implies that "King's" and "Hove Park" cannot cover all the students in their catchment as well
In [47]:
## Plot the map
students["colour"] = [colours[students.at[i, "school"]] for i in students.index]
schools["colour"] = [colours[schools.at[i, "establishment_name"]] for i in schools.index]
fig = students.plot(color=students["colour"], legend=True, figsize=(20,10))
schools.plot(ax=fig, color=schools["colour"], edgecolor='black', markersize=60, legend=True)
catchment_c.plot(ax=fig, facecolor="none", edgecolor="black", label="Catchment")
plt.title("Catchment alternative 3 model results")
plt.legend(handles=create_custom_legend_handles(colours))
plt.show()
plt.close()
In [48]:
## plot target PAN numbers
## create ordered lists
students_total_plot, PANs_2024_plot, students_outside_catchment, distances, distances_outside_catchment = list(), list(), list(), list(), list()
x_axis_list = list()
for school_str in PANs["school"]:
    students_total_plot.append(int(schools[schools["establishment_name"] == school_str]["students_total"]))
    PANs_2024_plot.append(int(PANs[PANs["school"] == school_str]["pan2024"]))
    ## find all LSOAs outside the catchment
    LSOAs_outside_catchment = students[(students["school"] == school_str) & (students["catchment_ID"] != students["catchment_ID_school"])]
    students_outside_catchment.append(sum( LSOAs_outside_catchment["5_est"] ) )
    distances.append(statistics.median( students[(students["school"] == school_str)]["dist_to_school"] ) *  0.000621371)
    if len(LSOAs_outside_catchment.index) > 0:
        distances_outside_catchment.append(statistics.median( LSOAs_outside_catchment["dist_to_school"] ) *  0.000621371)
    else:
        distances_outside_catchment.append(0)
    x_axis_list.append(school_str)

## Generate the graphs
x_axis = np.arange(len(x_axis_list))
width = 0.25
plt.figure(figsize=(7,5))
plt.title("Alternative 3: Model results compared to 2024 PANs")
plt.bar(x_axis - width, students_total_plot, width, color="steelblue", label="Model students")
plt.bar(x_axis , PANs_2024_plot, width, color="orange", label="2024 PANs")
plt.bar(x_axis + width, students_outside_catchment, width, color="red", label="Model students outside catchment")
plt.xticks(x_axis, x_axis_list, rotation=90)
plt.ylabel("Students")
plt.legend()
plt.show()
plt.close()

plt.figure(figsize=(7,5))
plt.title("Alternative 3: Median distances travelled")
plt.bar(x_axis - width / 2 , distances, width, color="steelblue", label="All students")
plt.bar(x_axis + width / 2 , distances_outside_catchment, width, color="red", label="Students outside catchment")
plt.xticks(x_axis, x_axis_list, rotation=90)
plt.ylabel("Distance (miles)")
plt.legend()
plt.show()
plt.close()